library(tidyverse)
## -- Attaching packages ----------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2 v purrr 0.3.4
## v tibble 3.0.3 v dplyr 1.0.2
## v tidyr 1.1.2 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.5.0
## -- Conflicts -------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
Print a graph (different from the one above) to a png file using 3*ppi for the height and width and display the png file in the report using the above R Markdown format.
time_series_deaths <- read_csv("data/time_series_covid19_deaths_global.csv")%>%
rename(Province_State = "Province/State", Country_Region = "Country/Region")
## Parsed with column specification:
## cols(
## .default = col_double(),
## `Province/State` = col_character(),
## `Country/Region` = col_character()
## )
## See spec(...) for full column specifications.
time_series_deaths_long <- time_series_deaths %>%
pivot_longer(-c(Province_State, Country_Region, Lat, Long), names_to = "Date", values_to = "Deaths")
time_series_deaths_long$Date <- mdy(time_series_deaths_long$Date)
ppi <- 300
png("images/time_series_worldwide_death.png", width=3*ppi, height=3*ppi, res=ppi)
time_series_deaths_long %>%
group_by(Date) %>%
summarise_at(c("Deaths"), sum) %>%
ggplot(aes(Date, Deaths, group = 1)) +
geom_point() +
geom_line() +
ggtitle("Worldwide COVID-19 Deaths") +
theme(plot.title=element_text(size = 10), text=element_text(size = 8))
dev.off()
Worldwide COVID-19 Deaths
Turn one of the exercises from Lab 5 into an interactive graph with plotyly
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
worldwide_deaths <- time_series_deaths_long %>%
group_by(Date) %>%
summarise_at(c("Deaths"), sum)
p <- ggplot(worldwide_deaths, aes(Date, Deaths, group = 1)) +
geom_point() +
geom_line() +
ggtitle("Worldwide COVID-19 Deaths")
ggplotly(p)
## Warning: `group_by_()` is deprecated as of dplyr 0.7.0.
## Please use `group_by()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
Create an animated graph of your choosing using the time series data to display an aspect (e.g. states or countries) of the data that is important to you.
library(gganimate)
library(transformr)
theme_set(theme_bw())
time_series_long_joined <- read_csv("data/time_series_long_joined.csv")
## Parsed with column specification:
## cols(
## Province_State = col_logical(),
## Country_Region = col_character(),
## Lat.x = col_double(),
## Long.x = col_double(),
## Date = col_character(),
## Confirmed = col_double(),
## Lat.y = col_double(),
## Long.y = col_double(),
## Deaths = col_double()
## )
## Warning: 20250 parsing failures.
## row col expected actual file
## 2001 Province_State 1/0/T/F/TRUE/FALSE Australian Capital Territory 'data/time_series_long_joined.csv'
## 2002 Province_State 1/0/T/F/TRUE/FALSE Australian Capital Territory 'data/time_series_long_joined.csv'
## 2003 Province_State 1/0/T/F/TRUE/FALSE Australian Capital Territory 'data/time_series_long_joined.csv'
## 2004 Province_State 1/0/T/F/TRUE/FALSE Australian Capital Territory 'data/time_series_long_joined.csv'
## 2005 Province_State 1/0/T/F/TRUE/FALSE Australian Capital Territory 'data/time_series_long_joined.csv'
## .... .............. .................. ............................ ..................................
## See problems(...) for more details.
time_series_long_joined_plus <- time_series_long_joined %>%
mutate(Deaths_per_Confirmed = Deaths/Confirmed) %>%
select( , -c(3,4,7,8))
time_series_long_joined_plus$Deaths_per_Confirmed[is.nan(time_series_long_joined_plus$Deaths_per_Confirmed)] <- 0
head(time_series_long_joined_plus)
## # A tibble: 6 x 6
## Province_State Country_Region Date Confirmed Deaths Deaths_per_Confirmed
## <lgl> <chr> <chr> <dbl> <dbl> <dbl>
## 1 NA Afghanistan 1/22/20 0 0 0
## 2 NA Afghanistan 1/23/20 0 0 0
## 3 NA Afghanistan 1/24/20 0 0 0
## 4 NA Afghanistan 1/25/20 0 0 0
## 5 NA Afghanistan 1/26/20 0 0 0
## 6 NA Afghanistan 1/27/20 0 0 0
time_series_long_joined_counts <- time_series_long_joined_plus %>%
pivot_longer(-c(Province_State, Country_Region, Date),
names_to = "Report_Type", values_to = "Counts")
time_series_long_joined_counts$Date <- mdy(time_series_long_joined_counts$Date)
head(time_series_long_joined_counts)
## # A tibble: 6 x 5
## Province_State Country_Region Date Report_Type Counts
## <lgl> <chr> <date> <chr> <dbl>
## 1 NA Afghanistan 2020-01-22 Confirmed 0
## 2 NA Afghanistan 2020-01-22 Deaths 0
## 3 NA Afghanistan 2020-01-22 Deaths_per_Confirmed 0
## 4 NA Afghanistan 2020-01-23 Confirmed 0
## 5 NA Afghanistan 2020-01-23 Deaths 0
## 6 NA Afghanistan 2020-01-23 Deaths_per_Confirmed 0
sk_data <- time_series_long_joined_counts %>%
group_by(Country_Region, Report_Type, Date) %>%
summarise_at(c("Counts"), sum) %>%
filter(Country_Region == "Korea, South")
p <- ggplot(sk_data, aes(x = Date, y = Counts, color = Report_Type, fill = Report_Type)) +
geom_point() +
geom_line() +
ggtitle("South Korea COVID-19 Cases") +
geom_point(aes(group = seq_along(Date))) +
transition_reveal(Date)
animate(p, end_pause = 15)